This file is regarding the preliminary results for the water stress greenhouse experiment - Group 5

Chapter 1 - First dealing with data

Visualization of data with plots.
# first thing to do is importing the data
# install.packages("readxl")
# readxl::read_excel("data_dec/water_stress.xlsx")
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")

After importing the data and give it the name of d0, we are going to visualize it, by creation different plots.

##Visualization of data with plots. Create many plots.

X= Date Y= Variable Y1= Plant height Y2= Leaf number Y3= Leaf lenght Y4= Leaf width Y5= Leaf area Y7= Chlorophyll

# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)

# first we are going to visualize all the species in a plot
ggplot(d0, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) + 
  geom_line()+
  facet_grid(Species ~. )
## Warning: Removed 3 rows containing missing values (`geom_line()`).

In this code chunk we are trying to visualize one species, to know how to plot it first.

# For Solanum lycopersicum
s1 <- d0[d0$Species == "Solanum lycopersicum", ]
ggplot(s1, aes(x = Date, y = Plant_height, group = PlantId, color = Treatment)) + 
  geom_line()

Then we will create a for loop to visualize all the species and all the variables.

v1 <- c("Plant_height", "Leaf_width", "Leaf_length", "Leaf_area", "Leaf_number", "Root_length", "Chlorophyll_content", "Soil_humidity", "Electrical_conductivity")
i <- "Beta vulgaris"
variable <- "Plant_height"

for(i in levels(as.factor(d0$Species))) {
  for(variable in v1) {
    s1 <- d0[d0$Species==i, c(variable, "Week", "PlantId", "Treatment")]
    s1 <- na.exclude(s1)
    p <- ggplot(s1, aes(x= Week, y= .data[[variable]], group= PlantId, color= Treatment)) + 
      geom_line() + 
      labs(title = i)
    print(p)
  }
}

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?


## Chapter 2 - ANOVA
in this part we are trying to do an ANOVA analysis for the data


```r
#Packages
#install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"), , dependencies = TRUE)
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   0.3.5      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(broom)

#Import data
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
d0
## # A tibble: 1,260 × 23
##    Group Week  Date                Species PlantId Use   Treat…¹ Soil_…² Elect…³
##    <chr> <chr> <dttm>              <chr>   <chr>   <chr> <chr>     <dbl>   <dbl>
##  1 G1    W1    2022-10-05 00:00:00 Solanu… Slc1    cult… c          55.6    1.31
##  2 G1    W1    2022-10-05 00:00:00 Solanu… Slc2    cult… c          52.7    1.28
##  3 G1    W1    2022-10-05 00:00:00 Solanu… Slc3    cult… c          61.3    1.24
##  4 G1    W1    2022-10-05 00:00:00 Solanu… Slc4    cult… c          51      1.36
##  5 G1    W1    2022-10-05 00:00:00 Solanu… Slc5    cult… c          52.1    1.39
##  6 G1    W1    2022-10-05 00:00:00 Solanu… Slc6    cult… c          54.4    1.24
##  7 G1    W1    2022-10-05 00:00:00 Solanu… Slc7    cult… c          51.6    1.51
##  8 G1    W1    2022-10-05 00:00:00 Amaran… Arc1    wild  c          54.6    1.68
##  9 G1    W1    2022-10-05 00:00:00 Amaran… Arc2    wild  c          50.9    1.93
## 10 G1    W1    2022-10-05 00:00:00 Amaran… Arc3    wild  c          67.2    1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## #   Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## #   Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## #   Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## #   Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## #   variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
names(d0)
##  [1] "Group"                   "Week"                   
##  [3] "Date"                    "Species"                
##  [5] "PlantId"                 "Use"                    
##  [7] "Treatment"               "Soil_humidity"          
##  [9] "Electrical_conductivity" "Too_dry"                
## [11] "Plant_height"            "Leaf_number"            
## [13] "Leaf_length"             "Leaf_width"             
## [15] "Leaf_area"               "Chlorophyll_content"    
## [17] "Aerial_fresh_weight"     "Aerial_dry_weight"      
## [19] "Root_length"             "Roots_fresh_weight"     
## [21] "Roots_dry_weight"        "Mortality"              
## [23] "Comments"
Linear model
Plant Height

Most visual difference is in week 6 (w6)

d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Plant_height")]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Plant_height ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Plant_height
##            Df Sum Sq Mean Sq  F value    Pr(>F)    
## Treatment   2    466   232.8   7.5815  0.000672 ***
## Species     8  59922  7490.3 243.9062 < 2.2e-16 ***
## Residuals 198   6081    30.7                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Plant_height ~ Treatment + Species, data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3858  -2.9645  -0.2497   2.4476  21.2022 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  14.9645     1.0121  14.785  < 2e-16 ***
## Treatmenti                   -0.2671     0.9367  -0.285 0.775794    
## Treatments                   -3.4121     0.9402  -3.629 0.000362 ***
## SpeciesBeta vulgaris          3.9026     1.5058   2.592 0.010261 *  
## SpeciesHordeum vulgare       37.1571     1.4811  25.088  < 2e-16 ***
## SpeciesLolium perenne        26.6762     1.4811  18.011  < 2e-16 ***
## SpeciesPortulaca oleracea    -7.0476     1.4811  -4.758 3.76e-06 ***
## SpeciesRaphanus sativus       6.0476     1.4811   4.083 6.44e-05 ***
## SpeciesSolanum lycopersicum  44.8333     1.4811  30.271  < 2e-16 ***
## SpeciesSonchus oleraceus      4.4619     1.4811   3.013 0.002928 ** 
## SpeciesSpinacia oleracea      0.9381     1.4811   0.633 0.527208    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.542 on 198 degrees of freedom
## Multiple R-squared:  0.9085, Adjusted R-squared:  0.9039 
## F-statistic: 196.6 on 10 and 198 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x = Treatment, y = Plant_height, fill = Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Leaf number
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_number
##            Df Sum Sq Mean Sq F value  Pr(>F)    
## Treatment   2  13.74   6.871  4.1003 0.01799 *  
## Species     8 618.29  77.286 46.1175 < 2e-16 ***
## Residuals 199 333.50   1.676                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_number
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2  304.1  152.06  15.356 6.315e-07 ***
## Species     8 3971.2  496.40  50.128 < 2.2e-16 ***
## Residuals 198 1960.7    9.90                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Leaf length
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_length
##            Df Sum Sq Mean Sq  F value Pr(>F)    
## Treatment   2    2.0    0.99   0.5525 0.5764    
## Species     8 5750.9  718.87 402.4626 <2e-16 ***
## Residuals 199  355.4    1.79                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_length
##            Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Treatment   2    79.8    39.9   6.1425  0.002581 ** 
## Species     8 30954.5  3869.3 595.6664 < 2.2e-16 ***
## Residuals 198  1286.2     6.5                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Leaf width
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_width
##            Df Sum Sq Mean Sq  F value    Pr(>F)    
## Treatment   2   3.62   1.809   5.1477  0.006612 ** 
## Species     8 704.03  88.004 250.4667 < 2.2e-16 ***
## Residuals 199  69.92   0.351                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_width
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   30.0   15.00  11.645 1.654e-05 ***
## Species     8 5014.3  626.78 486.695 < 2.2e-16 ***
## Residuals 198  255.0    1.29                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Leaf area
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_area
##            Df  Sum Sq Mean Sq F value  Pr(>F)    
## Treatment   2   128.4   64.21  4.3722 0.01386 *  
## Species     8 11029.3 1378.66 93.8798 < 2e-16 ***
## Residuals 199  2922.4   14.69                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_area
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   7957  3978.3  29.755 5.025e-12 ***
## Species     8 153005 19125.7 143.048 < 2.2e-16 ***
## Residuals 198  26473   133.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Chlorophyll content
Week 3
d1 <- d0[d0$Week == "W3" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment  2 107.26  53.630  6.5679  0.002311 ** 
## Species    3 298.82  99.607 12.1985 1.258e-06 ***
## Residuals 78 636.91   8.166                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

###### Week 4

d1 <- d0[d0$Week == "W4" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   15.56   7.780  0.8133    0.4459    
## Species     5  533.87 106.774 11.1623 8.283e-09 ***
## Residuals 117 1119.17   9.566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

###### Week 5

d1 <- d0[d0$Week == "W5" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##            Df  Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2   23.87   11.94  0.8801 0.4168    
## Species     6 2018.13  336.36 24.7988 <2e-16 ***
## Residuals 158 2143.01   13.56                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

###### Week 6

d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##            Df Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2   20.7   10.36  0.5908 0.5551    
## Species     6 4832.8  805.47 45.9083 <2e-16 ***
## Residuals 158 2772.1   17.55                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Aerial fresh weight
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_fresh_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Aerial_fresh_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Aerial_fresh_weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2 1676.5  838.27  62.861 < 2.2e-16 ***
## Species     8 5586.5  698.32  52.366 < 2.2e-16 ***
## Residuals 199 2653.7   13.34                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Aerial_fresh_weight ~ Treatment + Species, data = d1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8478 -2.7565 -0.1866  2.4259 12.7100 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.4190     0.6667   6.628 3.12e-10 ***
## Treatmenti                   -3.8374     0.6173  -6.217 2.92e-09 ***
## Treatments                   -6.9069     0.6173 -11.190  < 2e-16 ***
## SpeciesBeta vulgaris         10.7119     0.9760  10.976  < 2e-16 ***
## SpeciesHordeum vulgare        5.6719     0.9760   5.812 2.43e-08 ***
## SpeciesLolium perenne         1.0219     0.9760   1.047 0.296342    
## SpeciesPortulaca oleracea     0.6705     0.9760   0.687 0.492895    
## SpeciesRaphanus sativus       8.0210     0.9760   8.218 2.61e-14 ***
## SpeciesSolanum lycopersicum  15.2586     0.9760  15.634  < 2e-16 ***
## SpeciesSonchus oleraceus     10.9362     0.9760  11.205  < 2e-16 ***
## SpeciesSpinacia oleracea      3.5290     0.9760   3.616 0.000379 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.652 on 199 degrees of freedom
## Multiple R-squared:  0.7324, Adjusted R-squared:  0.719 
## F-statistic: 54.46 on 10 and 199 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_fresh_weight, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Aerial dry weight
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_dry_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Aerial_dry_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Aerial_dry_weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2  2.501  1.2503  16.477 2.488e-07 ***
## Species     8 53.289  6.6611  87.784 < 2.2e-16 ***
## Residuals 192 14.569  0.0759                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Aerial_dry_weight ~ Treatment + Species, data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80832 -0.13388 -0.04531  0.14231  1.37768 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  0.204034   0.050646   4.029 8.08e-05 ***
## Treatmenti                  -0.087659   0.047782  -1.835   0.0681 .  
## Treatments                  -0.291585   0.047327  -6.161 4.16e-09 ***
## SpeciesBeta vulgaris         0.649136   0.077647   8.360 1.25e-14 ***
## SpeciesHordeum vulgare       0.612857   0.073621   8.325 1.56e-14 ***
## SpeciesLolium perenne        0.080000   0.073621   1.087   0.2786    
## SpeciesPortulaca oleracea   -0.004762   0.073621  -0.065   0.9485    
## SpeciesRaphanus sativus      0.801429   0.073621  10.886  < 2e-16 ***
## SpeciesSolanum lycopersicum  1.464286   0.073621  19.890  < 2e-16 ***
## SpeciesSonchus oleraceus     1.228283   0.079249  15.499  < 2e-16 ***
## SpeciesSpinacia oleracea     0.140000   0.073621   1.902   0.0587 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2755 on 192 degrees of freedom
## Multiple R-squared:  0.7929, Adjusted R-squared:  0.7821 
## F-statistic: 73.52 on 10 and 192 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_dry_weight, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

Root length
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Root_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Root_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Root_length
##            Df  Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2    38.4   19.19  0.5121    0.6    
## Species     8 19277.3 2409.66 64.2873 <2e-16 ***
## Residuals 199  7459.1   37.48                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Root_length ~ Treatment + Species, data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2830  -2.8891  -0.4475   1.9244  27.2872 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.1226     1.1178   3.688 0.000291 ***
## Treatmenti                   -0.3986     1.0349  -0.385 0.700500    
## Treatments                    0.6394     1.0349   0.618 0.537392    
## SpeciesBeta vulgaris         16.0534     1.6363   9.811  < 2e-16 ***
## SpeciesHordeum vulgare       20.5303     1.6363  12.547  < 2e-16 ***
## SpeciesLolium perenne        10.8008     1.6363   6.601 3.63e-10 ***
## SpeciesPortulaca oleracea     0.2543     1.6363   0.155 0.876635    
## SpeciesRaphanus sativus      23.3591     1.6363  14.276  < 2e-16 ***
## SpeciesSolanum lycopersicum  20.8115     1.6363  12.719  < 2e-16 ***
## SpeciesSonchus oleraceus     23.0820     1.6363  14.107  < 2e-16 ***
## SpeciesSpinacia oleracea      3.5060     1.6363   2.143 0.033355 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.122 on 199 degrees of freedom
## Multiple R-squared:  0.7214, Adjusted R-squared:  0.7074 
## F-statistic: 51.53 on 10 and 199 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Root_length, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))

part3: PCA analysis

in this part we are going to do a PCA analysis for the data

#importing data

#we already imported the data in the previous parts, that's why the functions of importing the data are commented

#readxl::read_excel("data_dec/water_stress.xlsx")
#d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")

Next we need to make a subset for the data with only numerical variables and exclude the other variables, scale the data and after that we will exclude all the missing data

PCA_data <- d0[c(8:9, 11:21)]
PCA_data_scaled <- as.data.frame(scale(PCA_data))
view(PCA_data_scaled)

# exclude missing values NA

PCA_data01 <- na.exclude(PCA_data_scaled)

now we are going to do a PCA of the data

PCA <- prcomp(PCA_data01, scale = FALSE)
print(PCA)
## Standard deviations (1, .., p=13):
##  [1] 2.64322013 1.42965415 1.25678058 1.15025582 0.60623801 0.45306262
##  [7] 0.27287074 0.23294676 0.21466349 0.18793117 0.16714952 0.12251860
## [13] 0.04945087
## 
## Rotation (n x k) = (13 x 13):
##                                  PC1          PC2         PC3          PC4
## Soil_humidity           -0.004484399  0.049697147 -0.11723720 -0.248423655
## Electrical_conductivity -0.008044875 -0.006001977 -0.01643564 -0.037264645
## Plant_height            -0.317876839 -0.019322001 -0.14588964 -0.383332098
## Leaf_number              0.191695542 -0.636215879  0.64619422 -0.354302767
## Leaf_length             -0.204597818 -0.120259300 -0.25419740 -0.067658522
## Leaf_width              -0.403324908 -0.113622120 -0.12480722 -0.279492301
## Leaf_area               -0.460454617 -0.066851259  0.02718646 -0.113601184
## Chlorophyll_content     -0.028883692 -0.719939012 -0.40819495  0.500269950
## Aerial_fresh_weight     -0.354084459 -0.114177787  0.04662476  0.006153558
## Aerial_dry_weight       -0.308108066 -0.059717159  0.04332706 -0.089751120
## Root_length             -0.231145679 -0.001854812  0.10408313  0.102712787
## Roots_fresh_weight      -0.362604484  0.150905948  0.49832289  0.524473335
## Roots_dry_weight        -0.198800432  0.053083675  0.19070678  0.157603575
##                                 PC5         PC6         PC7         PC8
## Soil_humidity            0.81679444  0.28302620  0.10935536  0.15755331
## Electrical_conductivity  0.36647667  0.21907930 -0.16776733 -0.28746814
## Plant_height            -0.01464742 -0.29041881 -0.33800754 -0.39856999
## Leaf_number              0.04191206 -0.05574685 -0.04328160  0.03493110
## Leaf_length              0.06291150 -0.23212135 -0.20135610  0.18225842
## Leaf_width               0.04636750 -0.30319480 -0.03303809  0.05749506
## Leaf_area               -0.10873503  0.21608704  0.05655006  0.33231120
## Chlorophyll_content      0.12018037 -0.01247082  0.02255428 -0.09369632
## Aerial_fresh_weight     -0.26351706  0.68727077 -0.27316223  0.10132413
## Aerial_dry_weight       -0.07214602  0.12124570  0.74137362 -0.50267692
## Root_length              0.08617403 -0.28072317  0.35383667  0.52643216
## Roots_fresh_weight       0.27132068 -0.14341942 -0.21909866 -0.15424642
## Roots_dry_weight         0.09224390 -0.07332690 -0.03120358 -0.11337469
##                                  PC9         PC10         PC11        PC12
## Soil_humidity           -0.265854675  0.256000023 -0.021742611  0.03570246
## Electrical_conductivity  0.544832993 -0.608963020  0.141407519 -0.13048759
## Plant_height            -0.211222723  0.177138574  0.426567659 -0.32794203
## Leaf_number             -0.007399102 -0.001980915 -0.066542393 -0.05415789
## Leaf_length             -0.023428358 -0.157837279 -0.727494654 -0.43357987
## Leaf_width               0.043504934 -0.215075601 -0.081496233  0.75990672
## Leaf_area                0.612827517  0.447294277  0.086226340 -0.12079011
## Chlorophyll_content     -0.011741586  0.112225276  0.160531959  0.03938422
## Aerial_fresh_weight     -0.410458809 -0.251217825  0.010582180  0.02922203
## Aerial_dry_weight       -0.065107103 -0.045205220 -0.212166820 -0.10698135
## Root_length             -0.175927507 -0.407510329  0.405559238 -0.27149911
## Roots_fresh_weight      -0.033343533  0.120409932 -0.120142968  0.05697080
## Roots_dry_weight        -0.069956207  0.051885440 -0.002888916 -0.01928205
##                                 PC13
## Soil_humidity           -0.007103540
## Electrical_conductivity  0.021300917
## Plant_height            -0.084793094
## Leaf_number             -0.002703077
## Leaf_length              0.013467758
## Leaf_width               0.001399967
## Leaf_area                0.008221652
## Chlorophyll_content      0.004457611
## Aerial_fresh_weight     -0.012514819
## Aerial_dry_weight       -0.081265406
## Root_length             -0.037644747
## Roots_fresh_weight      -0.350823081
## Roots_dry_weight         0.927778627

now we will plot the PCA results

# Plotting the PCA results
# install.packages("factoextra") 
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")  

library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)

fviz_eig(PCA)

# graph for individuals



fviz_pca_ind(PCA,
             col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
<<<<<<< HEAD

=======

>>>>>>> 06bbc6c901366b69dcdb2a5664eb2474b4a5aa7c
# graph of variable


fviz_pca_var(PCA,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)
<<<<<<< HEAD

=======

>>>>>>> 06bbc6c901366b69dcdb2a5664eb2474b4a5aa7c